Task 1

getwd()
## [1] "/home/collindabbieri/Documents/AppliedRegressionAnalysis/Labs/Lab7"

Task 2

library(readxl)

prodqual=read_excel("../../Dataxls/Excel/PRODQUAL.XLS")
head(prodqual)
## # A tibble: 6 x 3
##    TEMP PRESSURE QUALITY
##   <dbl>    <dbl>   <dbl>
## 1    80       50    50.8
## 2    80       50    50.7
## 3    80       50    49.4
## 4    80       55    93.7
## 5    80       55    90.9
## 6    80       55    90.9

Answer Example 12.3

a)

Fit the complete second order model to the data

quality is y temp is x1 pressure is x2

\[E(y)=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_1x_2+\beta_4x_1^2+\beta_5x_2^2\]

y=prodqual$QUALITY
x1=prodqual$TEMP
x2=prodqual$PRESSURE

TEMP2=prodqual$TEMP^2
PRESS2=prodqual$PRESSURE^2

prodmod=lm(QUALITY~TEMP+PRESSURE+TEMP:PRESSURE+TEMP2+PRESS2,data=prodqual)

summary(prodmod)
## 
## Call:
## lm(formula = QUALITY ~ TEMP + PRESSURE + TEMP:PRESSURE + TEMP2 + 
##     PRESS2, data = prodqual)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.82593 -0.95370  0.07407  1.01852  2.95185 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -5.128e+03  1.103e+02  -46.49  < 2e-16 ***
## TEMP           3.110e+01  1.344e+00   23.13  < 2e-16 ***
## PRESSURE       1.397e+02  3.140e+00   44.51  < 2e-16 ***
## TEMP2         -1.334e-01  6.853e-03  -19.46 6.46e-15 ***
## PRESS2        -1.144e+00  2.741e-02  -41.74  < 2e-16 ***
## TEMP:PRESSURE -1.455e-01  9.692e-03  -15.01 1.06e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.679 on 21 degrees of freedom
## Multiple R-squared:  0.993,  Adjusted R-squared:  0.9913 
## F-statistic: 596.3 on 5 and 21 DF,  p-value: < 2.2e-16

b)

Sketch the response surface (3D plot of model fit)

print(min(prodqual$TEMP))
## [1] 80
print(max(prodqual$TEMP))
## [1] 100
print(min(prodqual$PRESSURE))
## [1] 50
print(max(prodqual$PRESSURE))
## [1] 60
print(prodmod$coefficients)
##   (Intercept)          TEMP      PRESSURE         TEMP2        PRESS2 
## -5127.8990741    31.0963889   139.7472222    -0.1333889    -1.1442222 
## TEMP:PRESSURE 
##    -0.1455000
library(rgl)
return_fitval=function(x1,x2){
  coeff=prodmod$coefficients
  val=coeff[1]+coeff[2]*x1+coeff[3]*x2+coeff[6]*x1*x2+coeff[4]*x1^2+coeff[5]*x2^2
  return(val)
}
x_plot=c()
y_plot=c()
z=c()
x1_pred=seq(80,100,by=0.1)
x2_pred=seq(50,60,by=0.1)
for(i in x1_pred){
  for(j in x2_pred){
    val=return_fitval(i,j)
    x_plot=append(x_plot,i)
    y_plot=append(y_plot,j)
    z=append(z,val)
  }
}


plot3d(x_plot,y_plot,z,type='l',xlab='x1',ylab='x2',zlab='y')
rglwidget()

c)

Test the overall utility of the model (model summary)

summary(prodmod)
## 
## Call:
## lm(formula = QUALITY ~ TEMP + PRESSURE + TEMP:PRESSURE + TEMP2 + 
##     PRESS2, data = prodqual)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.82593 -0.95370  0.07407  1.01852  2.95185 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -5.128e+03  1.103e+02  -46.49  < 2e-16 ***
## TEMP           3.110e+01  1.344e+00   23.13  < 2e-16 ***
## PRESSURE       1.397e+02  3.140e+00   44.51  < 2e-16 ***
## TEMP2         -1.334e-01  6.853e-03  -19.46 6.46e-15 ***
## PRESS2        -1.144e+00  2.741e-02  -41.74  < 2e-16 ***
## TEMP:PRESSURE -1.455e-01  9.692e-03  -15.01 1.06e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.679 on 21 degrees of freedom
## Multiple R-squared:  0.993,  Adjusted R-squared:  0.9913 
## F-statistic: 596.3 on 5 and 21 DF,  p-value: < 2.2e-16

The p value for the F test tells us that the model is adequate for explaining the data.

Task 3

Reproduce example 11.11 using R

x1=c(120.0,65.0,150.0,1073.8,150.0,610.0,88.2,88.2,88.2,90.0,30.0,441.0,441.0,441.0,441.0,627.0,610.0,150.0,1089.5,125.0,120.0,65.0,150.0,150.0,150.0,150.0,610.0,90.0,30.0,441.0,441.0,441.0,441.0,627.0,610.0,30.0)
x2=c(375,750,500,2170,325,1500,399,399,399,1140,325,410,410,410,410,1525,1500,500,2170,750,375,750,500,250,500,325,1500,1140,325,410,410,410,410,1525,1500,325)
y=c(3137,3590,4526,10825,4023,7606,3748,2972,3163,4065,2048,6500,5651,6565,6387,6454,6928,4268,14791,2680,2974,1965,2566,1515,2000,2735,3698,2635,1206,3775,3120,4206,4006,3728,3211,1200)

print(length(x1))
## [1] 36
print(length(x2))
## [1] 36
print(length(y))
## [1] 36

\[E(y) = \beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_1x_2\]

mod2=lm(y~x1+x2+x1:x2)

a)

Test the overall utility of the model using the global F test at \(\alpha=0.05\)

summary(mod2)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x1:x2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2964.74  -914.17    53.46  1028.66  2839.99 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.013e+03  7.324e+02   4.114 0.000254 ***
## x1           3.786e+00  2.135e+00   1.774 0.085659 .  
## x2          -1.529e+00  1.083e+00  -1.412 0.167689    
## x1:x2        3.439e-03  1.540e-03   2.233 0.032662 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1472 on 32 degrees of freedom
## Multiple R-squared:  0.7287, Adjusted R-squared:  0.7033 
## F-statistic: 28.65 on 3 and 32 DF,  p-value: 3.43e-09

The p-value of the F test is significantly less than 0.05, so we can reject the NULL \(H_0:\beta_1=\beta_2=\beta_3=0\). Therefore, the model is adequate at the 95% confidence level.

b)

Test the hypothesis (at \(\alpha=0.05\)) that the slope of the relationship between man-hours (y) and boiler capacity (x1) increases as the design pressure (x2) increases. That is, the capacity and pressure interact positively

Using the T-test, we see a one-tailed p-value of 0.0327/2=0.01635, meaning we can reject the NULL that \(\beta_3=0\) and claim, with 95% confidence, that the two interact positively.

c)

Estimate the change in man-hours (y) for every 1-psi increase in design pressure when boiler capacity is 750.

Estimated \(x_2\) slope=\(\hat{\beta_2}+\hat{\beta_3}x_1=-1.53+0.003(750)=0.72\)

What is the Null for the global F test? \(H_0: \beta_1=\beta_2=\beta_3=0\)

Why is the alternate test in part b) one sided?

The test in part b) is one sided because we are testing for \(H_a:\beta_3>0\) instead of \(H_a:\beta_3\neq 0\)

Derive the formula in part c)

One might naively assume that the slope in \(x_2\) is just \(\hat{\beta_2}\). However, since interaction is present, the rate of change of man-hours with the design pressure depends on \(x_1\). This gives us the equation in c)

Task 4

Answer question 12.24 pg 660 using the WAFER2 data

wafer=read_excel("../../Dataxls/Excel/WAFER2.xls")
head(wafer)
## # A tibble: 6 x 3
##   OXIDE  TIME POLYSIL
##   <dbl> <dbl>   <dbl>
## 1  1059    18     494
## 2  1049    35     853
## 3  1039    52    1090
## 4  1026    52    1058
## 5  1001    18     517
## 6   986    35     882

a)

Write a complete second-order model for polysilicon thickness (y) as a function of oxide thickness (x1) and deposition time (x2)

\[E(y)=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_1x_2+\beta_4x_1^2+\beta_5x_2^2\]

b)

Fit the model to the data. Give the least squares prediction equation

OXIDE2=wafer$OXIDE^2
TIME2=wafer$TIME^2
wafmod=lm(POLYSIL~OXIDE+TIME+OXIDE:TIME+OXIDE2+TIME2,data=wafer)
summary(wafmod)
## 
## Call:
## lm(formula = POLYSIL ~ OXIDE + TIME + OXIDE:TIME + OXIDE2 + TIME2, 
##     data = wafer)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.413 -20.210  -2.474  13.432  91.584 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.104e+02  2.022e+02   1.040  0.31358    
## OXIDE       -4.634e-01  2.355e-01  -1.968  0.06662 .  
## TIME         4.521e+01  7.273e+00   6.216 1.24e-05 ***
## OXIDE2       1.824e-04  1.140e-04   1.600  0.12914    
## TIME2       -3.019e-01  9.762e-02  -3.093  0.00698 ** 
## OXIDE:TIME  -7.509e-03  2.404e-03  -3.124  0.00654 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.84 on 16 degrees of freedom
## Multiple R-squared:  0.9849, Adjusted R-squared:  0.9802 
## F-statistic: 209.2 on 5 and 16 DF,  p-value: 5.506e-14

c)

Conduct a test to determine if the quadratic terms in the model are necessary (use \(\alpha=0.05\))

Using the p-value from the T statistic, the quadratic term for the oxide appears to be unnecessary.

d)

Conduct a test to determine if oxide thickness and deposition time interact. Use \(\alpha=0.05\)

The p-value from the T statistic for the interaction term is significantly less than 0.05. Therefore, we can say with 95% confidence that oxide thickness and deposition time interact.

e)

Based on your results, parts c) and d), what model modifications do you recommend? Explain.

I would recommend creating a model with the quadratic oxide term removed. This term had the highest p-value of the T statistic, and is not adequate at the 95% confidence level.

Task 5

Write a function mysecorder() that will take any xls data in the form of columns x1,x2 and y and perform a full MLR analysis

it needs to: Take arguments filename, alpha Produce a list containing -summary y.lm info -Coefficients (pt estimates) (1-\(\alpha\))100% ci for the betas (use ciReg() in s20x) A graph of the estimated trend paraboloid using rgl -graph should have data plotted -and the paraboloid -all axis labelled

Use function on WAFER2 and place output here

library(s20x)
mysecorder=function(filename,alpha=0.05){
  data=read_excel(filename)
  x1=unlist(data[,1])
  x2=unlist(data[,2])
  y=unlist(data[,3])

  
  x12=x1^2
  x22=x2^2
  
  model=lm(y~x1+x2+x1:x2+x12+x22)
  coeff=model$coefficients


  
  cis=ciReg(model,conf.level=1-alpha,print.out=FALSE)
  
  x=seq(min(x1),max(x1),length.out=40)
  y=seq(min(x2),max(x2),length.out=40)
  
  x_plot=c()
  y_plot=c()
  z=c()
  
  for(i in x){
    for(j in y){
      val=coeff[1]+coeff[2]*x+coeff[3]*y+coeff[4]*x^2+coeff[5]*y^2+coeff[6]*x*y
      z=append(z,val)
      x_plot=append(x_plot,i)
      y_plot=append(y_plot,j)
    }
  }
  open3d()
  mfrow3d(2,1)
  plot3d(x1,x2,y)
  
  plot3d(x_plot,y_plot,z,type='l',xlab='x1',ylab='x2',zlab='y')

  
  return(list(summary=summary(model),coefficients=coeff,cis=cis))
  
}

wafmodel=mysecorder("../../Dataxls/Excel/WAFER2.xls")
rglwidget()
wafmodel
## $summary
## 
## Call:
## lm(formula = y ~ x1 + x2 + x1:x2 + x12 + x22)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.413 -20.210  -2.474  13.432  91.584 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.104e+02  2.022e+02   1.040  0.31358    
## x1          -4.634e-01  2.355e-01  -1.968  0.06662 .  
## x2           4.521e+01  7.273e+00   6.216 1.24e-05 ***
## x12          1.824e-04  1.140e-04   1.600  0.12914    
## x22         -3.019e-01  9.762e-02  -3.093  0.00698 ** 
## x1:x2       -7.509e-03  2.404e-03  -3.124  0.00654 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33.84 on 16 degrees of freedom
## Multiple R-squared:  0.9849, Adjusted R-squared:  0.9802 
## F-statistic: 209.2 on 5 and 16 DF,  p-value: 5.506e-14
## 
## 
## $coefficients
##   (Intercept)            x1            x2           x12           x22 
##  2.104132e+02 -4.634462e-01  4.521013e+01  1.824256e-04 -3.019411e-01 
##         x1:x2 
## -7.509131e-03 
## 
## $cis
##             95 % C.I.lower    95 % C.I.upper
## (Intercept)  -2.182835e+02      6.391098e+02
## x1           -9.626031e-01      3.571058e-02
## x2            2.979105e+01      6.062921e+01
## x12          -5.926726e-05      4.241185e-04
## x22          -5.088924e-01     -9.498981e-02
## x1:x2        -1.260452e-02     -2.413740e-03